rm(list=ls())
############ initialize  ##########################
setwd('/Volumes/Disk2/Future_PM')
library(fields); library(maps); library(ncdf4);library(abind)
open.ncdf=nc_open;get.var.ncdf=ncvar_get;close.ncdf=nc_close

source('Function/get_geo.R')
source('Function/get_met.R')
source('Function/functions.R')
ss=load("Data/PM_region.Rdata")
time=seq(as.Date('1999-01-01'),as.Date('2013-12-31'),by='month')
date=cbind(as.numeric(format(time,"%Y")),as.numeric(format(time,"%m")))

#================= Part 1================================
load("Data/monthly-prate_1999-2013.Rdata")#precip
	spdata=array(NA,dim(precip))
	for (month in 1:12){
		  ind=(date[,2]==month)
		  y= precip[,,ind]
		  for (i in 1:dim(y)[1]){
			  for (j in 1:dim(y)[2]){
				  spdata[i,j,ind]= mov.detrend(y[i,j,])
			  }
		  }
	  }	
precip=spdata

surf_T=read_met('air.mon.mean.nc')
surf_RH=read_met('rhum.mon.mean.nc')
surf_uwind=read_met('uwnd.mon.mean.nc')
surf_vwind=read_met('vwnd.mon.mean.nc')
T= surf_T$data
RH=surf_RH$data
u0=surf_uwind$data
v0=surf_vwind$data
lon.out= surf_T$lon-360
lat.out= surf_T$lat

load("Data/PM25_1999_2013_invdist_2.5x2.5_500km.Rdata")
total_PM[total_PM>=200]=200
total_PM[total_PM<=0]=0
for (year in 1999:2013){
	for (month in 1:12){
		ind=(total_time[,1]==year & total_time[,2]==month)
		num=apply(!is.na(total_PM[,,ind]),c(1,2),sum,na.rm=TRUE)
		avg=apply(total_PM[,,ind],c(1,2),mean,na.rm=TRUE)
		avg[num<=9]=NA
		if (year==1999 & month==1){
			monthly_PM=avg
		} else {
			monthly_PM=abind(monthly_PM,avg,along=3)
		}
	}
}

monthly_PM.std=array(NA,dim(monthly_PM))
for (month in 1:12){
	ind=(date[,2]==month)
	PM=monthly_PM[,,ind]
	for (i in 1:length(lon.frame)){
		for (j in 1:length(lat.frame)){
			monthly_PM.std[i,j,ind]= mov.detrend(PM[i,j,])
		}
	}
}
US_region= PM.region==1

#================= Part 2================================
s1=combn(5,1);s1=rbind(s1,array(NA,c(4,choose(5,1))))
s2=combn(5,2);s2=rbind(s2,array(NA,c(3,choose(5,2))))
s3=combn(5,3);s3=rbind(s3,array(NA,c(2,choose(5,3))))
s4=combn(5,4);s4=rbind(s4,array(NA,c(1,choose(5,4))))
s5=combn(5,5);
local.candidates=cbind(s1,s2,s3)

s1=combn(7,1);s1=rbind(s1,array(NA,c(4,choose(7,1))))
s2=combn(7,2);s2=rbind(s2,array(NA,c(3,choose(7,2))))
s3=combn(7,3);s3=rbind(s3,array(NA,c(2,choose(7,3))))
s4=combn(7,4);s4=rbind(s4,array(NA,c(1,choose(7,4))))
s5=combn(7,5);
hybrid.candidates=cbind(s1,s2,s3)

local.PM=array(NA,dim(monthly_PM.std))
hybrid.PM=array(NA,dim(monthly_PM.std))
local.best_weights=array(NA,c(length(lon.frame),length(lat.frame),12,5))
hybrid.best_weights=array(NA,c(length(lon.frame),length(lat.frame),12,5))

for (month in 1:12){
print(month)	
correlation=array(NA,c(28,11))
time.ind1=(date[,2]>=(month-1)  & date[,2]<=(month+1))
time.ind2=(date[,2]>=(month-1-12)  & date[,2]<=(month+1-12))
time.ind3=(date[,2]>=(month-1+12)  & date[,2]<=(month+1+12))
time.ind=(time.ind1 | time.ind2 | time.ind3)	

for (ii in 3:28){
	for (jj in 1:11){
		if ( US_region[ii,jj]){
			
		print(c(month, ii, jj))	
		local.y_pred=array(NA,c(45,dim(local.candidates)[2]))
		hybrid.y_pred=array(NA,c(45,dim(hybrid.candidates)[2]))			
		index= monthly_PM.std[ii,jj,time.ind]		
		xx=date[time.ind,]

		loc=find.lon.lat(lon.frame[ii],lat.frame[jj],lon.out,lat.out)
		ind1=seq(loc[1]-6,loc[1]+6)
		ind2=seq(max(1,loc[2]-4),min(loc[2]+4,length(lat.out)))
		size=c(length(ind1),length(ind2),5)
		temp_T=aperm(apply(T[ind1,ind2, time.ind],c(1,2),scale,center=TRUE,scale=TRUE),c(2,3,1))
		temp_RH=aperm(apply(RH[ind1,ind2, time.ind],c(1,2),scale,center=TRUE,scale=TRUE),c(2,3,1))		
		temp_u0=aperm(apply(u0[ind1,ind2, time.ind],c(1,2),scale,center=TRUE,scale=TRUE),c(2,3,1))
		temp_v0=aperm(apply(v0[ind1,ind2, time.ind],c(1,2),scale,center=TRUE,scale=TRUE),c(2,3,1))
		temp_precip=aperm(apply(precip[ind1,ind2, time.ind],c(1,2),scale,center=TRUE,scale=TRUE),c(2,3,1))
		
		total_met=array(NA,c(length(ind1),length(ind2),size[3],length(index)))
	    total_met[,,1,]= temp_T
		total_met[,,2,]= temp_RH
		total_met[,,3,]= temp_u0
		total_met[,,4,]= temp_v0
		total_met[,,5,]= temp_precip
		x1=scale(T[loc[1],loc[2],time.ind],center=TRUE,scale=TRUE)
		x2=scale(RH[loc[1],loc[2],time.ind],center=TRUE,scale=TRUE)
		x3=scale(u0[loc[1],loc[2],time.ind],center=TRUE,scale=TRUE)
		x4=scale(v0[loc[1],loc[2],time.ind],center=TRUE,scale=TRUE)
		x5=scale(precip[loc[1],loc[2],time.ind],center=TRUE,scale=TRUE)

		new.total_met=array(NA,c(length(ind1)*length(ind2),size[3],length(index)))
		for (i in 1:length(index)){
			new.total_met[,,i]=matrix(total_met[,,,i], nrow=size[1]*size[2], ncol=size[3], byrow=FALSE)
		}			
											
		for (year in 1999:2013){
		train.ind=(xx[,1]!=year)
		test.ind =(xx[,1]==year)
		
		corr1=find.cor2(temp_T[,,train.ind], index[train.ind] ,p_value=0.15)
		corr2=find.cor2(temp_RH[,,train.ind], index[train.ind] ,p_value=0.15)	
		corr3=find.cor2(temp_u0[,,train.ind], index[train.ind] ,p_value=0.15)
		corr4=find.cor2(temp_v0[,,train.ind], index[train.ind],p_value=0.15)
		corr5=find.cor2(temp_precip[,,train.ind], index[train.ind],p_value=0.15)		
		F=abind(corr1, corr2, corr3,corr4,corr5,along=3);F[is.na(F)]=0
		size=dim(F)
		new.F<- matrix(F, nrow=size[1]*size[2], ncol=size[3], byrow=FALSE)
		ss=svd(new.F)
		uu=ss$u   
		vv=ss$v   
		dd=ss$d   

		PC=array(NA,c(length(index),2))
		for (t in 1:sum(time.ind)){
			for (v in 1:2){
			day.met= new.total_met[,,t]
			day.met[is.na(day.met)]=0
			PC[t,v]=t(uu[,v])%*%day.met%*%vv[,v]		
			}
		}

#--------- local met -------------------
old=data.frame(y=index[train.ind],z1=x1[train.ind],z2=x2[train.ind],z3=x3[train.ind],z4=x4[train.ind],z5=x5[train.ind])
new=data.frame(z1=x1[test.ind],z2=x2[test.ind],z3=x3[test.ind],z4=x4[test.ind],z5=x5[test.ind])
for(t in 1:dim(local.candidates)[2]){
	ap=local.candidates[,t];ap=ap[!is.na(ap)]
	xnam=paste("z",ap,sep="")
	fmla=as.formula(paste("y~",paste(xnam,collapse="+")))
	fit<-lm(fmla,data=old)	
	local.y_pred[test.ind,t]=predict(fit,new)
}

#--------- hybrid met -------------------
old=data.frame(y=index[train.ind],z1=x1[train.ind],z2=x2[train.ind],z3=x3[train.ind],z4=x4[train.ind],z5=x5[train.ind],z6=PC[train.ind,1],z7=PC[train.ind,2])
new=data.frame(z1=x1[test.ind],z2=x2[test.ind],z3=x3[test.ind],z4=x4[test.ind],z5=x5[test.ind],z6=PC[test.ind,1],z7=PC[test.ind,2])
for(t in 1:dim(hybrid.candidates)[2]){
	ap=hybrid.candidates[,t];ap=ap[!is.na(ap)]
	xnam=paste("z",ap,sep="")
	fmla=as.formula(paste("y~",paste(xnam,collapse="+")))
	fit<-lm(fmla,data=old)	
	hybrid.y_pred[test.ind,t]=predict(fit,new)
}
}
#-------best weights--------------------
  	  weights=array(NA,dim(local.candidates)[2]);
	  for(i in 1:dim(local.candidates)[2]){
	  	MAT=na.omit(cbind(index,local.y_pred[,i]))
	  	weights[i]=cor(MAT[,1],MAT[,2])
	  } 
	 temp.ind=(date[,2]==month) 
     local.PM[ii,jj,temp.ind]=local.y_pred[,which.max(weights)][xx[,2]==month]
	 local.best_weights[ii,jj,month,]=local.candidates[,which.max(weights)]
		
  	weights=array(NA,dim(hybrid.candidates)[2]);
	  for(i in 1:dim(hybrid.candidates)[2]){
	  	MAT=na.omit(cbind(index,hybrid.y_pred[,i]))
	  	weights[i]=cor(MAT[,1],MAT[,2])
	  } 
	 temp.ind=(date[,2]==month) 
     hybrid.PM[ii,jj,temp.ind]= hybrid.y_pred[,which.max(weights)][xx[,2]==month]
	 hybrid.best_weights[ii,jj,month,]= hybrid.candidates[,which.max(weights)]	  

}
}
}
	par(mar=c(3,2,1,3))
	par(mfrow=c(2,2))	
	ap2=find.cor(monthly_PM.std, local.PM,plim=1)
	plot.field(ap2^2,lon.frame,lat.frame,type='def',zlim=c(0,0.7),legend.mar=4.5,mai=c(0.2,0.2,0.2,0.2))
	title('(a) local')
	ap3=find.cor(monthly_PM.std, hybrid.PM,plim=1)
	plot.field(ap3^2,lon.frame,lat.frame,type='def',zlim=c(0,0.7),legend.mar=4.5,mai=c(0.2,0.2,0.2,0.2))
	title('(b) hybrid')
	plot.field(ap3^2-ap2^2,lon.frame,lat.frame,type='sign',zlim=c(-0.2,0.2),legend.mar=4.5,mai=c(0.2,0.2,0.2,0.2))
	title('(c) hybrid - local')
	points(lon.frame[ii],lat.frame[jj],pch=16)
}

mean(ap3^2-ap2^2,na.rm=TRUE)/mean(ap3^2,na.rm=TRUE)
setwd('Figures/Figure 3')
save('local.best_weights','hybrid.best_weights','date','local.PM','hybrid.PM','monthly_PM.std','lon.frame','lat.frame',file='monthly_CV.Rdata')
